# -*- coding: utf-8 -*-
import scrapy
import os

class ItcastSpider(scrapy.Spider):
    name = 'itcast'
    allowed_domains = ['itcast.cn']
    start_urls = ['http://www.itcast.cn/channel/teacher.shtml']

    # 数据提取的方法
    def parse(self, response):
        # 1, 定位老师的位置并提取教师详情信息
        teacher_list = response.xpath("//*/div[2]/div/ul/li")
        # 2, 遍历老师信息列表，对数据进行提取
        teacher_list1 = []
        for teacher in teacher_list:
            teacher_dict = {}
            teacher_dict['姓名'] = teacher.xpath('.//h3/text()').extract_first()
            teacher_dict['等级'] = teacher.xpath('.//h4/text()').extract_first()
            teacher_dict['详情'] = teacher.xpath('.//p/text()').extract_first()
            # print(teacher_dict)
            teacher_list1.append(teacher_dict)
        if os.path.exists('./../itcast.txt'):
            os.mkdir('itcast.txt')
        for i in teacher_list1:
            with open('itcast', 'a')as f:
                f.write(str(i)+'\n')

